libraries needed:
library(base)
library(datasets)
library(Biobase)
## Loading required package: BiocGenerics
##
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, aperm, append, as.data.frame, basename, cbind,
## colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
## get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
## match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
## Position, rank, rbind, Reduce, rownames, sapply, setdiff, sort,
## table, tapply, union, unique, unsplit, which.max, which.min
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
library(GEOquery)
## Setting options('download.file.method.GEOquery'='auto')
## Setting options('GEOquery.inmemory.gpl'=FALSE)
library(Seurat)
## Attaching SeuratObject
library(CellChat)
## Loading required package: dplyr
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:Biobase':
##
## combine
## The following objects are masked from 'package:BiocGenerics':
##
## combine, intersect, setdiff, union
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
## Loading required package: igraph
##
## Attaching package: 'igraph'
## The following objects are masked from 'package:dplyr':
##
## as_data_frame, groups, union
## The following objects are masked from 'package:BiocGenerics':
##
## normalize, path, union
## The following objects are masked from 'package:stats':
##
## decompose, spectrum
## The following object is masked from 'package:base':
##
## union
## Loading required package: ggplot2
library(dplyr)
library(patchwork)
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2
## ──
## ✔ tibble 3.1.8 ✔ purrr 1.0.1
## ✔ tidyr 1.3.0 ✔ stringr 1.5.0
## ✔ readr 2.1.3 ✔ forcats 0.5.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ tibble::as_data_frame() masks igraph::as_data_frame(), dplyr::as_data_frame()
## ✖ dplyr::combine() masks Biobase::combine(), BiocGenerics::combine()
## ✖ purrr::compose() masks igraph::compose()
## ✖ tidyr::crossing() masks igraph::crossing()
## ✖ dplyr::filter() masks stats::filter()
## ✖ igraph::groups() masks dplyr::groups()
## ✖ dplyr::lag() masks stats::lag()
## ✖ ggplot2::Position() masks BiocGenerics::Position(), base::Position()
## ✖ purrr::simplify() masks igraph::simplify()
library(magrittr)
##
## Attaching package: 'magrittr'
##
## The following object is masked from 'package:purrr':
##
## set_names
##
## The following object is masked from 'package:tidyr':
##
## extract
#library(liana)
library(ggplot2)
library(devtools)
## Loading required package: usethis
mosk.data <- Read10X(data.dir = "/Users/xiaoyizheng/Downloads/Praktikum/Bioinfo_Einarbeiten/CellChatSelf/mouse/GSE113854_RAW/")
#mosk.data
mosk <- CreateSeuratObject(counts = mosk.data, project = "MoSk", min.cells = 3, min.features = 200)
mosk
## An object of class Seurat
## 17090 features across 22322 samples within 1 assay
## Active assay: RNA (17090 features, 0 variable features)
to-do: quality control
mosk[["percent.mt"]] <- PercentageFeatureSet(mosk, pattern = "^MT-") #mitoc. genes -> Zeichen fuer schlechte Qualitaet
VlnPlot(mosk, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3) #All cells have the same value of percent.mt.(maybe bc already filtered?)
## Warning in SingleExIPlot(type = type, data = data[, x, drop = FALSE], idents =
## idents, : All cells have the same value of percent.mt.
head(mosk@meta.data, 5) #Show QC metrics for the first 5 cells
## orig.ident nCount_RNA nFeature_RNA percent.mt
## AAACCTGAGAATTCCC-1 MoSk 1475 783 0
## AAACCTGAGACCGGAT-1 MoSk 4148 1624 0
## AAACCTGAGACGCACA-1 MoSk 3136 1342 0
## AAACCTGAGATGTGGC-1 MoSk 3138 1323 0
## AAACCTGAGCTGCGAA-1 MoSk 4029 1721 0
mosk.data[c("Igf2", "Cck", "Pf4"), 1:30]
## 3 x 30 sparse Matrix of class "dgCMatrix"
## [[ suppressing 30 column names 'AAACCTGAGAATTCCC-1', 'AAACCTGAGACCGGAT-1', 'AAACCTGAGACGCACA-1' ... ]]
##
## Igf2 . . . . . 1 . . . . . . . . . . . 1 . . . . . . . . . . . .
## Cck . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
## Pf4 . . . . . . . . 11 . . . . . 25 . . 1 1 . 1 1 . 1 . . . . . 2
plot1 <- FeatureScatter(mosk, feature1 = "nCount_RNA", feature2 = "percent.mt") #since all have same mt quality...
## Warning in cor(x = data[, 1], y = data[, 2]): the standard deviation is zero
plot2 <- FeatureScatter(mosk, feature1 = "nCount_RNA", feature2 = "nFeature_RNA") #positively correlated
plot1 + plot2
#filter "mosk"
mosk <- subset(mosk, subset = nFeature_RNA > 200 & nFeature_RNA < 2500 & percent.mt < 5) #hmmm but mt for filtering unneccessary
mosk <- NormalizeData(mosk, normalization.method = "LogNormalize", scale.factor = 10000)
mosk <- FindVariableFeatures(mosk, selection.method = "vst", nfeatures = 2000)
top10 <- head(VariableFeatures(mosk), 10) # Identify the 10 most highly variable genes
top10
## [1] "Hbb-bs" "Hba-a1" "Ccl21a" "Hbb-bt" "Saa3" "Acp5" "Ccl5" "S100a8"
## [9] "Mmp9" "S100a9"
plot1 <- VariableFeaturePlot(mosk)
plot2 <- LabelPoints(plot = plot1, points = top10, repel = TRUE)
## When using repel, set xnudge and ynudge to 0 for optimal results
plot1 + plot2
all.genes <- rownames(mosk)
mosk <- ScaleData(mosk, features = all.genes)
## Centering and scaling data matrix
mosk <- RunPCA(mosk, features = VariableFeatures(object = mosk))
## PC_ 1
## Positive: Col1a1, Col1a2, Col3a1, Sparc, Bgn, Col5a2, Dcn, Meg3, Postn, Col6a1
## Col6a2, Fstl1, Col12a1, Col5a1, Aebp1, Lum, Serpinh1, Mfap4, Col6a3, Mmp2
## Thbs2, Gpx3, Fbln2, Serpinf1, Tmsb10, Fn1, Lgals1, Lrrc15, Itm2a, Timp2
## Negative: Fth1, Tyrobp, Fcer1g, Lyz2, Ftl1, Tmsb4x, Ctss, H2-D1, C1qb, Apoe
## C1qa, C1qc, Cd52, Wfdc17, Ptpn18, Pf4, Lgals3, Laptm5, Ms4a7, Aif1
## Cd74, Clec4n, Ucp2, Lst1, Ms4a6c, Cd68, Id2, Alox5ap, Cd14, Fxyd5
## PC_ 2
## Positive: Col4a1, Col4a2, Cdh5, Igfbp7, Pecam1, Egfl7, Sparcl1, Crip2, Plvap, Ramp2
## Col15a1, Col18a1, Cav1, Ctla2a, Emcn, Myct1, Adgrf5, Adgrl4, Kdr, Cd93
## Esam, Mcam, Ly6c1, Pdlim1, Fabp4, Cldn5, Mmrn2, Gng11, Tinagl1, Flt1
## Negative: Tyrobp, Fcer1g, Lyz2, Lgals3, C1qb, Ctss, Apoe, Ftl1, C1qa, C1qc
## Fth1, Pf4, Wfdc17, Ms4a7, Lst1, Cd52, Aif1, Lgals1, Cstb, Cd74
## Laptm5, Clec4n, Id2, Ifi27l2a, Cd68, Ms4a6c, Alox5ap, Cd14, Csf1r, Cyba
## PC_ 3
## Positive: Rgs5, Ndufa4l2, Gm13889, Higd1b, Des, Notch3, Serpine2, Cox4i2, Il6, Acta2
## Mylk, Rasgrp2, Cygb, Thy1, Ebf1, Ppp1r14a, Crip1, Mgp, Col4a1, Mustn1
## Pdgfa, Myl9, S100a4, Phlda1, Abcc9, Tppp3, Col4a2, Gucy1a3, Parm1, Actg2
## Negative: Cdh5, Pecam1, Ctla2a, Egfl7, Cd93, Ramp2, Cd34, Ly6c1, Myct1, Kdr
## S100a16, Mfap4, Adgrl4, Emcn, Igf1, Mmrn2, Lum, Cldn5, Igfbp3, Ly6a
## Mest, Plvap, Ptprb, Ptn, Gpihbp1, Tie1, Fbln2, Gja1, Aebp1, Icam2
## PC_ 4
## Positive: Malat1, Il6, Cxcl1, Meg3, Serping1, Mt1, Gm13889, Rgs5, Col14a1, Errfi1
## Sparcl1, Ogn, Eln, Neat1, Ccl2, Mt2, Klf4, Nov, Clec3b, Fmod
## Col4a1, Ebf1, Procr, Fosb, H19, Id3, Col4a2, Dnajb1, Fxyd1, Cygb
## Negative: 2810417H13Rik, Birc5, Stmn1, Ube2c, Top2a, Tuba1b, Hmgb2, Cenpa, Cdca3, H2afz
## Cks2, Ccnb2, Prc1, Cks1b, Ccna2, 2700094K13Rik, Cenpf, Cdca8, Smc2, Cdk1
## Ccnb1, Lockd, Mki67, Nusap1, Tpx2, Spc24, Tubb5, Tk1, H2afx, Cenpm
## PC_ 5
## Positive: Pf4, Apoe, Sepp1, C1qc, C1qb, C1qa, Cxcl1, Ctsd, Ms4a7, Neat1
## Lyz2, Lgmn, Fosb, Gas6, Klf4, Syngr1, Cxcl2, Ier3, Mrc1, Ccl7
## H19, Ftl1, Trem2, Ccl2, Col14a1, Igfbp4, Lst1, Top2a, Igf1, Slc40a1
## Negative: Tmsb10, S100a6, H2-Eb1, Ccr7, Samsn1, H2-Aa, Ramp3, H2-Ab1, Crabp1, Klrd1
## Bcl2a1d, Tbc1d4, Tmsb4x, Icos, Ifitm1, Napsa, Ptprcap, Cd209a, Cytip, Cd52
## H2-DMb1, Tnfrsf18, H2-DMa, AW112010, Cd74, Il1r2, Cd7, Cd3d, Tnfrsf4, Tnfrsf9
print(mosk[["pca"]], dims = 1:5, nfeatures = 5)
## PC_ 1
## Positive: Col1a1, Col1a2, Col3a1, Sparc, Bgn
## Negative: Fth1, Tyrobp, Fcer1g, Lyz2, Ftl1
## PC_ 2
## Positive: Col4a1, Col4a2, Cdh5, Igfbp7, Pecam1
## Negative: Tyrobp, Fcer1g, Lyz2, Lgals3, C1qb
## PC_ 3
## Positive: Rgs5, Ndufa4l2, Gm13889, Higd1b, Des
## Negative: Cdh5, Pecam1, Ctla2a, Egfl7, Cd93
## PC_ 4
## Positive: Malat1, Il6, Cxcl1, Meg3, Serping1
## Negative: 2810417H13Rik, Birc5, Stmn1, Ube2c, Top2a
## PC_ 5
## Positive: Pf4, Apoe, Sepp1, C1qc, C1qb
## Negative: Tmsb10, S100a6, H2-Eb1, Ccr7, Samsn1
VizDimLoadings(mosk, dims = 1:2, reduction = "pca")
DimPlot(mosk, reduction = "pca")
#DimPlot(mosk, reduction = "pca",label = TRUE)
DimHeatmap(mosk, dims = 1, cells = 500, balanced = TRUE) #heatmap: exploration of the primary sources of heterogeneity
DimHeatmap(mosk, dims = 1:15, cells = 500, balanced = TRUE)
mosk <- JackStraw(mosk, num.replicate = 100)
mosk <- ScoreJackStraw(mosk, dims = 1:20)
JackStrawPlot(mosk, dims = 1:15)
## Warning: Removed 21000 rows containing missing values (`geom_point()`).
ElbowPlot(mosk)
mosk <- FindNeighbors(mosk, dims = 1:10)
## Computing nearest neighbor graph
## Computing SNN
mosk <- FindClusters(mosk, resolution = 0.3) #13 clusters aus paper #res = 0.3 cluster 0->12
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 22172
## Number of edges: 707614
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9307
## Number of communities: 13
## Elapsed time: 4 seconds
head(Idents(mosk), 5)
## AAACCTGAGAATTCCC-1 AAACCTGAGACCGGAT-1 AAACCTGAGACGCACA-1 AAACCTGAGATGTGGC-1
## 3 9 8 0
## AAACCTGAGCTGCGAA-1
## 6
## Levels: 0 1 2 3 4 5 6 7 8 9 10 11 12
mosk <- RunUMAP(mosk, dims = 1:10)
## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session
## 09:24:17 UMAP embedding parameters a = 0.9922 b = 1.112
## 09:24:17 Read 22172 rows and found 10 numeric columns
## 09:24:17 Using Annoy for neighbor search, n_neighbors = 30
## 09:24:17 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 09:24:19 Writing NN index file to temp file /var/folders/w_/82bj411144v93vqxt1tbsg9h0000gn/T//RtmpxwQyaP/filed497843b666
## 09:24:19 Searching Annoy index using 1 thread, search_k = 3000
## 09:24:27 Annoy recall = 100%
## 09:24:28 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
## 09:24:29 Initializing from normalized Laplacian + noise (using RSpectra)
## 09:24:30 Commencing optimization for 200 epochs, with 907130 positive edges
## 09:24:42 Optimization finished
DimPlot(mosk, reduction = "umap", label = TRUE)
##saveRDS(mosk, file = "/Users/xiaoyizheng/Downloads/Bioinfo_Einarbeiten/moskOutput.rds")
###retrived from CellChat paper summplm.
“Crabp1”, “Des”, “Rgs5”, “C1qb”, “Pf4”, “Eln”, “Ogn”, “Cd93”, “Pecam1”,“Birc5”, “Ccnb2”, “Icos”, “Nkg7”, “Ccr7”, “H2-DMb1”, “Hdc”, “G0s2”, “Cadm4”, “Itih5”, “Hba-a2”, “Hbb-bs”, “Acp5”, “Mmp9”, “Ccl21a”, “Lyve1”
#FIB2:
VlnPlot(mosk, features = c("Des", "Rgs5"), sort = 'decreasing')
#MYL:
VlnPlot(mosk, features = c("C1qb", "Pf4"), sort = 'decreasing')
#FIB-3:
VlnPlot(mosk, features = c("Eln", "Ogn"), sort = 'decreasing')
#ENDO:
VlnPlot(mosk, features = c("Cd93", "Pecam1"), sort = 'decreasing')
#FIB-4:
VlnPlot(mosk, features = c("Birc5", "Ccnb2"), sort = 'decreasing')
#TC:
VlnPlot(mosk, features = c("Icos", "Nkg7"), sort = 'decreasing')
#BC:
VlnPlot(mosk, features = c("Ccr7", "H2-DMb1"), sort = 'decreasing')
#FIB-5:
VlnPlot(mosk, features = c("Hdc", "G0s2"), sort = 'decreasing')
#SCH:
VlnPlot(mosk, features = c("Cadm4", "Itih5"), sort = 'decreasing') + labs(title = "SCH")
#RBC:
VlnPlot(mosk, features = c("Hba-a2", "Hbb-bs"), sort = 'decreasing')
#DEN:
VlnPlot(mosk, features = c("Acp5", "Mmp9"), sort = 'decreasing')
#LYME:
VlnPlot(mosk, features = c("Ccl21a", "Lyve1"), sort = 'decreasing')
#mosk.markers <- FindAllMarkers(mosk, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.2) #0.2 is okki
#mosk.markers %>%
# group_by(cluster) %>%
# slice_max(n = 2, order_by = avg_log2FC)
#mosk.markers %>%
# group_by(cluster) %>%
# top_n(n = 10, wt = avg_log2FC) -> top10
#DoHeatmap(mosk, features = top10$gene)
#+ NoLegend()
#all in one:
#FeaturePlot(mosk, features = c("Crabp1", "Des", "Rgs5", "C1qb", "Pf4", "Eln", "Ogn", "Cd93", "Pecam1","Birc5", "Ccnb2", "Icos", "Nkg7", "Ccr7", "H2-DMb1", "Hdc", "G0s2", "Cadm4", "Itih5", "Hba-a2", "Hbb-bs", "Acp5", "Mmp9", "Ccl21a", "Lyve1"))
FeaturePlot(mosk, features = "Crabp1")
FeaturePlot(mosk, features = c("Des", "Rgs5", "C1qb", "Pf4", "Eln", "Ogn", "Cd93", "Pecam1","Birc5", "Ccnb2", "Icos", "Nkg7"))
FeaturePlot(mosk, features = c("Ccr7", "H2-DMb1", "Hdc", "G0s2", "Cadm4", "Itih5", "Hba-a2", "Hbb-bs", "Acp5", "Mmp9", "Ccl21a", "Lyve1"))
new.cluster.ids <- c("FIB-1", "FIB-3", "MYL", "FIB-2", "BC", "LYME", "FIB-4", "RBC", "ENDO", "TC", "FIB-5", "SCH", "DEN")
names(new.cluster.ids) <- levels(mosk)
mosk <- RenameIdents(mosk, new.cluster.ids)
DimPlot(mosk, reduction = "umap", label = TRUE, pt.size = 0.5)
markers.to.plot <- c("Crabp1",
"Hba-a2", "Hbb-bs",
"Eln", "Ogn",
"Ccr7", "H2-DMb1",
"C1qb", "Pf4",
"Des", "Rgs5",
"Cd93", "Pecam1",
"Ccl21a", "Lyve1",
"Birc5", "Ccnb2",
"Nkg7",
"Hdc", "G0s2",
"Acp5", "Mmp9")
DotPlot(
mosk,
assay = NULL,
features = markers.to.plot,
cols = c("lightgrey", "blue"),
col.min = -2.5,
col.max = 2.5,
dot.min = 0,
dot.scale = 6,
idents = NULL,
group.by = NULL,
split.by = NULL,
cluster.idents = FALSE,
scale = TRUE,
scale.by = "radius",
scale.min = NA,
scale.max = NA
) + RotatedAxis()
installing LIANA
if (!requireNamespace(“BiocManager”, quietly = TRUE)) install.packages(“BiocManager”)
if (!requireNamespace(“remotes”, quietly = TRUE)) install.packages(“remotes”)
remotes::install_github(‘saezlab/liana’, force = TRUE) ###devtools::install_github(‘saezlab/liana’, force = TRUE)
library(liana) liana_path <- system.file(package = “liana”) liana_test <- liana_wrap(mosk, resource = c(“Consensus”))
liana_test %>% dplyr::glimpse() ###=> list of 5: results through natmi, connectome, logfc, sca, cellphonedb ###extracting results only those via CellPhoneDB liana_test_cpdb <- liana_test$cellphonedb liana_test_cpdb CrossTalkR_input <- select(liana_test_cpdb, source, ligand, target, receptor, lr.mean) write.csv(CrossTalkR_input, file=“/Users/xiaoyizheng/Downloads/Praktikum/mosk12_liana_res.csv”)
###we probably dont want to aggregate since we only need for now from CellPhoneDB right? ###liana_test <- liana_test %>% liana_aggregate() ###dplyr::glimpse(liana_test) ###liana_test %>% ### liana_dotplot(source_groups = c(“FIB-1”), ### target_groups = c(“FIB-2”, “FIB-3”, “FIB-4”), ### ntop = 20)
install_github(“hadley/devtools”) install.packages(“devtools”) devtools::install_github(“https://github.com/CostaLab/CrossTalkeR”, force = TRUE, build_vignettes = TRUE) require(CrossTalkeR)
suppressPackageStartupMessages({require(CrossTalkeR)}) suppressPackageStartupMessages({require(igraph)}) suppressPackageStartupMessages({require(ggraph)}) suppressPackageStartupMessages({require(ggplot2)})
paths <- c(‘mosk12’ = system.file(“/Users/xiaoyizheng/Downloads/Praktikum”, “mosk12_liana_res.csv”, package = “CrossTalkeR”)) ###why system.file() here? it returns paths, but cant we just directly use path or something?
genes <- c(‘Hba-a2’) data <- generate_report(paths, genes, out_path=paste0(getwd(),‘/html/’), threshold=0, out_file = ‘mosk12_test.html’, output_fmt = “html_document”, report = TRUE)